import requests
# 导入
import lxml.html

# 获取网页上的内容
"""
devResponse  = requests.get("https://yueshushu.top/")
devContent = devResponse.content.decode()
# 构建 xpath 对象， 从 lxml.fromstring 中
selector = lxml.html.fromstring(devContent)
# 获取的是一个列表， 所以要用 [0] 表示取出来的是第一个
title = selector.xpath("//html/head/title/text()")[0]
print(f'title是: {title}')
content = selector.xpath("//*[@id='app']/div[1]/div/p/text()")[0]
content = content.replace('\r\n','<br/>')
print(f'内容是: {content}')

"""
# 属性以什么开头
stockContent = '''<!DOCTYPE html>
<html lang="zh">
<head>
    <meta charset="UTF-8">
    <title>我的第一个HTML页面</title>
</head>
<body>
    <h1>欢迎来到我的网站</h1>
    <p>这里是一些我喜欢的东西：</p>
    <ul id = 'u1'>
        <li id ="li-1">编程</li>
        <li id ='li-2'>阅读</li>
        <li id ='li-3'>旅行</li>
    </ul>
</body>
</html>'''
# 转换成 selector 对象
print(f'打印内容{stockContent}')
selector = lxml.html.fromstring(stockContent)
# id 以什么前缀
labelList = selector.xpath("//li[starts-with(@id,'li-')]/text()")
print(f'打印label: {labelList}')
# id 包含
labelList2 = selector.xpath("//li[contains(@id,'li')]/text()")
print(f'打印label: {labelList2}')
# 也可以多次筛选处理
u1 = selector.xpath("//ul")
liList = u1[0].xpath('//ul/li/text()')
print(f'liList {liList}')